export const description = `
Stress tests for GPUAdapter.requestDevice.
`;

import { Fixture } from '../../common/framework/fixture.js';
import { makeTestGroup } from '../../common/framework/test_group.js';
import { attemptGarbageCollection } from '../../common/util/collect_garbage.js';
import { keysOf } from '../../common/util/data_tables.js';
import { getGPU } from '../../common/util/navigator_gpu.js';
import { assert, iterRange } from '../../common/util/util.js';
import { getDefaultLimitsForCTS } from '../../webgpu/capability_info.js';

export const g = makeTestGroup(Fixture);

/** Adapter preference identifier to option. */
const kAdapterTypeOptions: {
  readonly [k in GPUPowerPreference | 'fallback']: GPURequestAdapterOptions;
} =
  /* prettier-ignore */ {
  'low-power':        { powerPreference:        'low-power', forceFallbackAdapter: false },
  'high-performance': { powerPreference: 'high-performance', forceFallbackAdapter: false },
  'fallback':         { powerPreference:          undefined, forceFallbackAdapter:  true },
};
/** List of all adapter hint types. */
const kAdapterTypes = keysOf(kAdapterTypeOptions);

/**
 * Creates a device, a valid compute pipeline, valid resources for the pipeline, and
 * ties them together into a set of compute commands ready to be submitted to the GPU
 * queue. Does not submit the commands in order to make sure that all resources are
 * kept alive until the device is destroyed.
 */
async function createDeviceAndComputeCommands(t: Fixture, adapter: GPUAdapter) {
  // Constants are computed such that per run, this function should allocate roughly 2G
  // worth of data. This should be sufficient as we run these creation functions many
  // times. If the data backing the created objects is not recycled we should OOM.
  const limitInfo = getDefaultLimitsForCTS();
  const kNumPipelines = 64;
  const kNumBindgroups = 128;
  const kNumBufferElements =
    limitInfo.maxComputeWorkgroupSizeX.default * limitInfo.maxComputeWorkgroupSizeY.default;
  const kBufferSize = kNumBufferElements * 4;
  const kBufferData = new Uint32Array([...iterRange(kNumBufferElements, x => x)]);

  const device: GPUDevice = await t.requestDeviceTracked(adapter);
  const commands = [];

  for (let pipelineIndex = 0; pipelineIndex < kNumPipelines; ++pipelineIndex) {
    const pipeline = device.createComputePipeline({
      layout: 'auto',
      compute: {
        module: device.createShaderModule({
          code: `
              struct Buffer { data: array<u32>, };

              @group(0) @binding(0) var<storage, read_write> buffer: Buffer;
              @compute @workgroup_size(1) fn main(
                  @builtin(global_invocation_id) id: vec3<u32>) {
                buffer.data[id.x * ${limitInfo.maxComputeWorkgroupSizeX.default}u + id.y] =
                  buffer.data[id.x * ${limitInfo.maxComputeWorkgroupSizeX.default}u + id.y] +
                    ${pipelineIndex}u;
              }
            `,
        }),
        entryPoint: 'main',
      },
    });
    for (let bindgroupIndex = 0; bindgroupIndex < kNumBindgroups; ++bindgroupIndex) {
      const buffer = t.trackForCleanup(
        device.createBuffer({
          size: kBufferSize,
          usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
        })
      );
      device.queue.writeBuffer(buffer, 0, kBufferData, 0, kBufferData.length);
      const bindgroup = device.createBindGroup({
        layout: pipeline.getBindGroupLayout(0),
        entries: [{ binding: 0, resource: { buffer } }],
      });

      const encoder = device.createCommandEncoder();
      const pass = encoder.beginComputePass();
      pass.setPipeline(pipeline);
      pass.setBindGroup(0, bindgroup);
      pass.dispatchWorkgroups(
        limitInfo.maxComputeWorkgroupSizeX.default,
        limitInfo.maxComputeWorkgroupSizeY.default
      );
      pass.end();
      commands.push(encoder.finish());
    }
  }
  return { device, objects: commands };
}

/**
 * Creates a device, a valid render pipeline, valid resources for the pipeline, and
 * ties them together into a set of render commands ready to be submitted to the GPU
 * queue. Does not submit the commands in order to make sure that all resources are
 * kept alive until the device is destroyed.
 */
async function createDeviceAndRenderCommands(t: Fixture, adapter: GPUAdapter) {
  // Constants are computed such that per run, this function should allocate roughly 2G
  // worth of data. This should be sufficient as we run these creation functions many
  // times. If the data backing the created objects is not recycled we should OOM.
  const kNumPipelines = 128;
  const kNumBindgroups = 128;
  const kSize = 128;
  const kBufferData = new Uint32Array([...iterRange(kSize * kSize, x => x)]);

  const device: GPUDevice = await t.requestDeviceTracked(adapter);
  const commands = [];

  for (let pipelineIndex = 0; pipelineIndex < kNumPipelines; ++pipelineIndex) {
    const module = device.createShaderModule({
      code: `
          struct Buffer { data: array<vec4<u32>, ${(kSize * kSize) / 4}>, };

          @group(0) @binding(0) var<uniform> buffer: Buffer;
          @vertex fn vmain(
            @builtin(vertex_index) vertexIndex: u32
          ) -> @builtin(position) vec4<f32> {
            let index = buffer.data[vertexIndex / 4u][vertexIndex % 4u];
            let position = vec2<f32>(f32(index % ${kSize}u), f32(index / ${kSize}u));
            let r = vec2<f32>(1.0 / f32(${kSize}));
            let a = 2.0 * r;
            let b = r - vec2<f32>(1.0);
            return vec4<f32>(fma(position, a, b), 0.0, 1.0);
          }

          @fragment fn fmain() -> @location(0) vec4<f32> {
            return vec4<f32>(${pipelineIndex}.0 / ${kNumPipelines}.0, 0.0, 0.0, 1.0);
          }
        `,
    });
    const pipeline = device.createRenderPipeline({
      layout: device.createPipelineLayout({
        bindGroupLayouts: [
          device.createBindGroupLayout({
            entries: [
              {
                binding: 0,
                visibility: GPUShaderStage.VERTEX,
                buffer: { type: 'uniform' },
              },
            ],
          }),
        ],
      }),
      vertex: { module, entryPoint: 'vmain', buffers: [] },
      primitive: { topology: 'point-list' },
      fragment: {
        targets: [{ format: 'rgba8unorm' }],
        module,
        entryPoint: 'fmain',
      },
    });
    for (let bindgroupIndex = 0; bindgroupIndex < kNumBindgroups; ++bindgroupIndex) {
      const buffer = t.trackForCleanup(
        device.createBuffer({
          size: kSize * kSize * 4,
          usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
        })
      );
      device.queue.writeBuffer(buffer, 0, kBufferData, 0, kBufferData.length);
      const bindgroup = device.createBindGroup({
        layout: pipeline.getBindGroupLayout(0),
        entries: [{ binding: 0, resource: { buffer } }],
      });
      const texture = t.trackForCleanup(
        device.createTexture({
          size: [kSize, kSize],
          usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
          format: 'rgba8unorm',
        })
      );

      const encoder = device.createCommandEncoder();
      const pass = encoder.beginRenderPass({
        colorAttachments: [
          {
            view: texture.createView(),
            loadOp: 'load',
            storeOp: 'store',
          },
        ],
      });
      pass.setPipeline(pipeline);
      pass.setBindGroup(0, bindgroup);
      pass.draw(kSize * kSize);
      pass.end();
      commands.push(encoder.finish());
    }
  }
  return { device, objects: commands };
}

/**
 * Creates a device and a large number of buffers which are immediately written to. The
 * buffers are expected to be kept alive until they or the device are destroyed.
 */
async function createDeviceAndBuffers(t: Fixture, adapter: GPUAdapter) {
  // Currently we just allocate 2G of memory using 512MB blocks. We may be able to
  // increase this to hit OOM instead, but on integrated GPUs on Metal, this can cause
  // kernel panics at the moment, and it can greatly increase the time needed.
  const kTotalMemorySize = 2 * 1024 * 1024 * 1024;
  const kMemoryBlockSize = 512 * 1024 * 1024;
  const kMemoryBlockData = new Uint8Array(kMemoryBlockSize);

  const device: GPUDevice = await t.requestDeviceTracked(adapter);
  const buffers = [];
  for (let memory = 0; memory < kTotalMemorySize; memory += kMemoryBlockSize) {
    const buffer = t.trackForCleanup(
      device.createBuffer({
        size: kMemoryBlockSize,
        usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
      })
    );

    // Write out to the buffer to make sure that it has backing memory.
    device.queue.writeBuffer(buffer, 0, kMemoryBlockData, 0, kMemoryBlockData.length);
    buffers.push(buffer);
  }
  return { device, objects: buffers };
}

g.test('coexisting')
  .desc(`Tests allocation of many coexisting GPUDevice objects.`)
  .params(u => u.combine('adapterType', kAdapterTypes))
  .fn(async t => {
    const { adapterType } = t.params;
    const adapter = await getGPU(t.rec).requestAdapter(kAdapterTypeOptions[adapterType]);
    assert(adapter !== null, 'Failed to get adapter.');

    // Based on Vulkan conformance test requirement to be able to create multiple devices.
    const kNumDevices = 5;

    const devices = [];
    for (let i = 0; i < kNumDevices; ++i) {
      const device = await t.requestDeviceTracked(adapter);
      devices.push(device);
    }
  });

g.test('continuous,with_destroy')
  .desc(
    `Tests allocation and destruction of many GPUDevice objects over time. Device objects
are sequentially requested with a series of device allocated objects created on each
device. The devices are then destroyed to verify that the device and the device allocated
objects are recycled over a very large number of iterations.`
  )
  .params(u => u.combine('adapterType', kAdapterTypes))
  .fn(async t => {
    const { adapterType } = t.params;
    const adapter = await getGPU(t.rec).requestAdapter(kAdapterTypeOptions[adapterType]);
    assert(adapter !== null, 'Failed to get adapter.');

    // Since devices are being destroyed, we should be able to create many devices.
    const kNumDevices = 100;
    const kFunctions = [
      createDeviceAndBuffers,
      createDeviceAndComputeCommands,
      createDeviceAndRenderCommands,
    ];

    const deviceList = [];
    const objectLists = [];
    for (let i = 0; i < kNumDevices; ++i) {
      const { device, objects } = await kFunctions[i % kFunctions.length](t, adapter);
      t.expect(objects.length > 0, 'unable to allocate any objects');
      deviceList.push(device);
      objectLists.push(objects);
      device.destroy();
    }
  });

g.test('continuous,no_destroy')
  .desc(
    `Tests allocation and implicit GC of many GPUDevice objects over time. Objects are
sequentially requested and dropped for GC over a very large number of iterations. Note
that without destroy, we do not create device allocated objects because that will
implicitly keep the device in scope.`
  )
  .params(u => u.combine('adapterType', kAdapterTypes))
  .fn(async t => {
    const { adapterType } = t.params;
    const adapter = await getGPU(t.rec).requestAdapter(kAdapterTypeOptions[adapterType]);
    assert(adapter !== null, 'Failed to get adapter.');

    const kNumDevices = 10_000;
    for (let i = 1; i <= kNumDevices; ++i) {
      await (async () => {
        // No trackForCleanup because it would prevent the GPUDevice from being GCed.
        // eslint-disable-next-line no-restricted-syntax
        t.expect((await adapter.requestDevice()) !== null, 'unexpected null device');
      })();
      if (i % 10 === 0) {
        // We need to occasionally wait for GC to clear out stale devices.
        await attemptGarbageCollection();
      }
    }
  });
